{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: pandas in c:\\users\\23326\\anaconda3\\envs\\gg\\lib\\site-packages (2.2.2)\n",
      "Requirement already satisfied: numpy>=1.26.0 in c:\\users\\23326\\anaconda3\\envs\\gg\\lib\\site-packages (from pandas) (2.0.0)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\23326\\anaconda3\\envs\\gg\\lib\\site-packages (from pandas) (2.9.0)\n",
      "Requirement already satisfied: pytz>=2020.1 in c:\\users\\23326\\anaconda3\\envs\\gg\\lib\\site-packages (from pandas) (2024.1)\n",
      "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\23326\\anaconda3\\envs\\gg\\lib\\site-packages (from pandas) (2024.1)\n",
      "Requirement already satisfied: six>=1.5 in c:\\users\\23326\\anaconda3\\envs\\gg\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install pandas\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "file_path = \".\\positions.csv\"\n",
    "output_file_path = \".\\cleand.csv\"\n",
    "\n",
    "df = pd.read_csv(file_path, sep=\",\", \n",
    "  names=[\"岗位名称\", \"薪水\", \"公司名\", \"岗位类型\", \"岗位大类\", \"学历\", \"经验\", \"技能\", \"城市\", \"行业\"])\n",
    "\n",
    "df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n",
    "\n",
    "def convert_salary(salary):\n",
    "    if \"K\" in salary:\n",
    "        parts = salary.split(\"-\")\n",
    "        return float(parts[0].strip('K')) * 1000\n",
    "    return salary\n",
    "\n",
    "df[\"薪水\"] = df[\"薪水\"].apply(convert_salary)\n",
    "\n",
    "df[\"岗位类型\"] = df[\"岗位类型\"].str.replace(\"岗位类型:\", \"\", regex=False).str.strip()\n",
    "df[\"岗位大类\"] = df[\"岗位大类\"].str.replace(\"岗位大类:\", \"\", regex=False).str.strip()\n",
    "\n",
    "df[\"技能\"] = df[\"技能\"].fillna(\"无\")\n",
    "\n",
    "df = df.drop_duplicates()\n",
    "\n",
    "df.to_csv(output_file_path, index=False, encoding='utf-8')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
